# coding:utf-8
import requests
from fake_useragent import UserAgent
from lxml import etree
from random import randint
from time import sleep


def get_html(url):
    headers = {
        "User-Agent": UserAgent().random,
        "Cookie": "ci=1"
    }
    sleep(randint(3, 10))
    response = requests.get(url, headers=headers)
    response.encoding = 'utf-8'
    if response.status_code == 200:
        return response.text
    else:
        return None


def prase_index(html):
    e = etree.HTML(html)
    all_url = e.xpath('''//dd/div[@class='movie-item film-channel']/a/@href''')
    return ["http://maoyan.com{}".format(url) for url in all_url]



def praseinfo(url):

    headers = {
        "UserAgent": UserAgent().random
    }
    sleep(randint(3, 10))
    response = requests.get(url, headers=headers)
    response.encoding = "UTF-8"
    html = response.text
    e = etree.HTML(html)
    name = e.xpath("//h1[@class='name']/text()")
    types = e.xpath("//li[@class='ellipsis']/a[@class='text-link']/text()")
    print("name:", name)
    print("types", types)



def main():
    index_url = "https://maoyan.com/films/"
    html = get_html(index_url)
    movie_urls = prase_index(html)
    for url in movie_urls:
        praseinfo(url)


if __name__ == "__main__":
    main()


